data-raw/read in onet crosswalks.R

library(readxl)
library(data.table)

#### 2000 to 2006 Crosswalk ####
soc00_06 <- as.data.table(readxl::read_xls("data/onet soc crosswalks/ONET SOC 2000 to 2006 Crosswalk.xls",
                                           range="A1:D1168")[,c(1,3)])

# soc 2000 - 2006
soc00_to_06 <- soc00_06 %>%
  mutate(id = rep(1,dim(soc00_06)[1])) %>%
  dplyr::rename("onetsoc2000code" = "O*NET-SOC 2000 Code",
                "onetsoc2006code" = "O*NET-SOC 2006 Code",
                "id" = "id") %>%
  group_by(onetsoc2000code) %>%
  mutate(dup.SOC2000 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2000)/100 ) %>%
  select(c("onetsoc2000code","onetsoc2006code","weight"))

# soc 2006 - 2000
soc06_to_00 <- soc00_06 %>%
  mutate(id = rep(1,dim(soc00_06)[1])) %>%
  dplyr::rename("onetsoc2000code" = "O*NET-SOC 2000 Code",
                "onetsoc2006code" = "O*NET-SOC 2006 Code",
                "id" = "id") %>%
  group_by(onetsoc2006code) %>%
  mutate(dup.SOC2006 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2006)/100 ) %>%
  select(c("onetsoc2006code","onetsoc2000code","weight"))

#### 2006 to 2009 Crosswalk ####
soc06_09 <- as.data.table(readxl::read_xls("data/onet soc crosswalks/ONET SOC 2006 to 2009 Crosswalk.xls",
                                           range="A1:D950")[,c(1,3)])

# soc 2006 - 2009
soc06_to_09 <- soc06_09 %>%
  mutate(id = rep(1,dim(soc06_09)[1])) %>%
  dplyr::rename("onetsoc2006code" = "O*NET-SOC 2006 Code",
                "onetsoc2009code" = "O*NET-SOC 2009 Code",
                "id" = "id") %>%
  group_by(onetsoc2006code) %>%
  mutate(dup.SOC2006 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2006)/100 ) %>%
  select(c("onetsoc2006code","onetsoc2009code","weight"))

# soc 2009 - 2006
soc09_to_06 <- soc06_09 %>%
  mutate(id = rep(1,dim(soc06_09)[1])) %>%
  dplyr::rename("onetsoc2006code" = "O*NET-SOC 2006 Code",
                "onetsoc2009code" = "O*NET-SOC 2009 Code",
                "id" = "id") %>%
  group_by(onetsoc2009code) %>%
  mutate(dup.SOC2009 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2009)/100 ) %>%
  select(c("onetsoc2009code","onetsoc2006code","weight"))

#### 2009 to 2010 Crosswalk ####
soc09_10 <- as.data.table(readxl::read_xls("data/onet soc crosswalks/ONET SOC 2009 to 2010 Crosswalk.xls",
                                           range="A1:D950")[,c(1,3)])

# soc 2009 - 2010
soc09_to_10 <- soc09_10 %>%
  mutate(id = rep(1,dim(soc09_10)[1])) %>%
  dplyr::rename("onetsoc2009code" = "O*NET-SOC 2009 Code",
                "onetsoc2010code" = "O*NET-SOC 2010 Code",
                "id" = "id") %>%
  group_by(onetsoc2009code) %>%
  mutate(dup.SOC2009 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2009)/100 ) %>%
  select(c("onetsoc2009code","onetsoc2010code","weight"))

# soc 2010 - 2009
soc10_to_09 <- soc09_10 %>%
  mutate(id = rep(1,dim(soc09_10)[1])) %>%
  dplyr::rename("onetsoc2009code" = "O*NET-SOC 2009 Code",
                "onetsoc2010code" = "O*NET-SOC 2010 Code",
                "id" = "id") %>%
  group_by(onetsoc2010code) %>%
  mutate(dup.SOC2010 = sum(id)) %>%
  ungroup() %>%
  mutate(weight = (100/dup.SOC2010)/100 ) %>%
  select(c("onetsoc2010code","onetsoc2009code","weight"))
djmorris1989/onetmappinguk documentation built on June 14, 2020, 10:04 a.m.